samples <- read.table(file.path("Analysis/SraRunTable.txt"), sep = ",", header = TRUE) %>%
dplyr::mutate(cell_line = ifelse(grepl("A673", x = source_name), "A673", "TC71")) %>%
dplyr::mutate(condition = ifelse(grepl("WT|siCT", x = GENOTYPE), "Control", "Treatment"))
A673_samples <- samples %>%
dplyr::filter(GENOTYPE %in% c("WT", "SA2 KO") & cell_line == "A673")
TC71_samples <- samples %>%
dplyr::filter(GENOTYPE %in% c("SA2 KO", "WT") & cell_line == "TC71")
S2KO_samples <- samples %>%
dplyr::filter(GENOTYPE == "SA2 KO")
A673_salmon_files <- file.path("Salmon/salmon.out", A673_samples$Run, "quant.sf") %>%
setNames(object = , A673_samples$Run)
TC71_salmon_files <- file.path("Salmon/salmon.out", TC71_samples$Run, "quant.sf") %>%
setNames(object = , TC71_samples$Run)
S2KO_salmon_files <- file.path("Salmon/salmon.out", S2KO_samples$Run, "quant.sf") %>%
setNames(object = , S2KO_samples$Run)
ensdb <- EnsDb.Hsapiens.v86
transcripts <- transcripts(ensdb, columns = c(listColumns(ensdb, "tx"), "gene_name"),
return.type = "data.frame") %>%
as_tibble() %>%
dplyr::select(tx_id, gene_name)
A673_txi <- tximport(A673_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)
TC71_txi <- tximport(TC71_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)
S2KO_txi <- tximport(S2KO_salmon_files, type = "salmon", tx2gene = transcripts, ignoreTxVersion = TRUE)
A673_dds_txi <- DESeqDataSetFromTximport(A673_txi, colData = A673_samples, design = ~condition)
TC71_dds_txi <- DESeqDataSetFromTximport(TC71_txi, colData = TC71_samples, design = ~condition)
S2KO_dds_txi <- DESeqDataSetFromTximport(S2KO_txi, colData = S2KO_samples, design = ~cell_line)
vst <- vst(A673_dds_txi)
A673_PCA <- plotPCA(vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
A673_percentVar <- round(100 * attr(A673_PCA, "percentVar"))
ggplot(A673_PCA, aes(PC1,PC2, color = GENOTYPE)) +
geom_point(size = 3) +
ggtitle("PCA Plot for A673 STAG2KO samples" ) +
xlab(paste0("PC1: ", A673_percentVar[1], "% variance")) +
ylab(paste0("PC2: ", A673_percentVar[2], "% variance")) +
coord_fixed()
TC71_vst <- vst(TC71_dds_txi)
TC71_PCA <- plotPCA(TC71_vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
TC71_percentVar <- round(100 * attr(TC71_PCA, "percentVar"))
ggplot(TC71_PCA, aes(PC1,PC2, color = GENOTYPE)) +
geom_point(size = 3) +
ggtitle("PCA Plot for TC71 STAG2KO samples" ) +
xlab(paste0("PC1: ", A673_percentVar[1], "% variance")) +
ylab(paste0("PC2: ", A673_percentVar[2], "% variance")) +
coord_fixed()
S2KO_vst <- vst(S2KO_dds_txi)
S2KO_PCA <- plotPCA(S2KO_vst, intgroup = c("cell_line", "GENOTYPE"), returnData = TRUE)
S2KO_percentVar <- round(100 * attr(S2KO_PCA, "percentVar"))
ggplot(S2KO_PCA, aes(PC1,PC2, color = cell_line)) +
geom_point(size = 3) +
ggtitle("PCA Plot for All STAG2KO samples" ) +
xlab(paste0("PC1: ", S2KO_percentVar[1], "% variance")) +
ylab(paste0("PC2: ", S2KO_percentVar[2], "% variance")) +
coord_fixed()
A673_metadata <- colData(A673_dds_txi)
TC71_metadata <- colData(TC71_dds_txi)
all_metadata <- rbind(A673_metadata,TC71_metadata)
all_metadata %>%
as.data.frame() %>%
dplyr::select(GENOTYPE, cell_line) %>%
kbl(caption = "Table 1: Sample Overview") %>%
kable_styling(bootstrap_options = "striped", full_width = T, html_font = "Cambria")
| GENOTYPE | cell_line | |
|---|---|---|
| SRR9326185 | SA2 KO | A673 |
| SRR9326186 | SA2 KO | A673 |
| SRR9326187 | SA2 KO | A673 |
| SRR9326195 | WT | A673 |
| SRR9326196 | WT | A673 |
| SRR9326197 | WT | A673 |
| SRR9326198 | SA2 KO | TC71 |
| SRR9326199 | SA2 KO | TC71 |
| SRR9326200 | SA2 KO | TC71 |
| SRR9326201 | SA2 KO | TC71 |
| SRR9326202 | SA2 KO | TC71 |
| SRR9326203 | SA2 KO | TC71 |
| SRR9326208 | WT | TC71 |
| SRR9326209 | WT | TC71 |
| SRR9326210 | WT | TC71 |
EnhancedVolcano(A673_sig_ordered_result,
lab = A673_sig_ordered_result$Gene_name,
x = "log2FoldChange",
y = "pvalue",
title = "Siginifcant Genes for STAG2KO in A673 cells",
subtitle = "",
pointSize = 1.0,
labSize = 4.0,
xlim = c(min(A673_sig_ordered_result$log2FoldChange), max(A673_sig_ordered_result$log2FoldChange)),
ylim = c(0, 300)
)
EnhancedVolcano(TC71_sig_ordered_result,
lab = TC71_sig_ordered_result$Gene_name,
x = "log2FoldChange",
y = "pvalue",
title = "Siginifcant Genes for STAG2KO in TC71 Cells versus Control",
subtitle = "",
pointSize = 1.0,
labSize = 4.0,
xlim = c(min(TC71_sig_ordered_result$log2FoldChange), max(TC71_sig_ordered_result$log2FoldChange)),
ylim = c(0, 200)
)
EnhancedVolcano(S2KO_sig_ordered_result,
lab = S2KO_sig_ordered_result$Gene_name,
x = "log2FoldChange",
y = "pvalue",
title = "Siginifcant Genes for between the STAG2KO samples in A673 and TC71 cell lines",
subtitle = "",
pointSize = 1.0,
labSize = 4.0,
xlim = c(min(S2KO_sig_ordered_result$log2FoldChange), max(S2KO_sig_ordered_result$log2FoldChange)),
ylim = c(0, 200)
)
A673_table_result <- dplyr::select(A673_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)
datatable(A673_table_result, class = 'cell-border stripe',
caption = "Table 2: A672 STAG2KO Differentally Significant Genes")
TC71_table_result <- dplyr::select(TC71_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)
datatable(TC71_table_result, class = 'cell-border stripe',
caption = "Table 3: TC71 STAG2KO Differentally Significant Genes")
S2KO_table_result <- dplyr::select(S2KO_sig_ordered_result, Gene_name, log2FoldChange, stat, pvalue, padj)
datatable(S2KO_table_result, class = 'cell-border stripe',
caption = "Table 4: STAG2KO samples Differentally Significant Genes")
A673_heatmap <- pheatmap(A673_sig_norm_dds_counts,
main = "Top 10 Over- and Under- expressed A673 STAG2KO DEGs",
color = palette(200),
cluster_rows = FALSE,
cluster_cols = FALSE,
show_rownames = TRUE,
annotation = dplyr::select(A673_heat_meta, condition),
scale = "row")
TC71_heatmap <- pheatmap(TC71_sig_norm_dds_counts,
main = "Top 10 Over- and Under- expressed TC71 STAG2KO DEGs",
color = palette(200),
cluster_rows = FALSE,
cluster_cols = FALSE,
show_rownames = TRUE,
annotation = dplyr::select(TC71_heat_meta, condition),
scale = "row")
S2KO_heatmap <- pheatmap(S2KO_sig_norm_dds_counts,
main = "Top 10 Over- and Under- expressed A673 STAG2KO DEGs",
color = palette(200),
cluster_rows = FALSE,
cluster_cols = FALSE,
show_rownames = TRUE,
annotation = dplyr::select(S2KO_heat_meta, cell_line),
scale = "row")
A673_ordered_gse <- A673_gse %>%
dplyr::arrange(desc(abs(NES)))
A673_ordered_gse_df <- A673_ordered_gse %>%
as_tibble() %>%
dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)
datatable(A673_ordered_gse_df, class = 'cell-border stripe',
caption = "Table 4: GSEA enrichment results for A673 STAG2KO significant DEGs")
gseaplot2(A673_ordered_gse, geneSetID = 1, title = A673_ordered_gse$Description[1])
TC71_ordered_gse <- TC71_gse %>%
dplyr::arrange(desc(abs(NES)))
TC71_ordered_gse_df <- TC71_ordered_gse %>%
as_tibble() %>%
dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)
datatable(TC71_ordered_gse_df, class = 'cell-border stripe',
caption = "Table 5: GSEA enrichment results for TC71 STAG2KO significant DEGs")
gseaplot2(TC71_ordered_gse, geneSetID = 1, title = TC71_ordered_gse$Description[1])
S2KO_ordered_gse <- S2KO_gse %>%
dplyr::arrange(desc(abs(NES)))
S2KO_ordered_gse_df <- S2KO_ordered_gse %>%
as_tibble() %>%
dplyr::select(ONTOLOGY, ID, Description, NES, pvalue, p.adjust, qvalue)
datatable(S2KO_ordered_gse_df, class = 'cell-border stripe',
caption = "Table 5: GSEA enrichment results for all STAG2KO samples significant DEGs")
gseaplot2(S2KO_ordered_gse, geneSetID = 1, title = S2KO_ordered_gse$Description[1])
A673_resRmd <- llply(names(A673_gene_list), function(groupNow) {
genesNow <- A673_gene_list[[groupNow]]
response <- httr::POST(
url = 'https://maayanlab.cloud/Enrichr/addList',
body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
)
)
response <- jsonlite::fromJSON(httr::content(response, as = "text"))
permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=",
response$shortId[1])
knitr::knit_child(text = c(
'### `r groupNow`',
'',
'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
''
),
envir = environment(),
quiet = TRUE)
})
cat(unlist(A673_resRmd), sep = '\n')
Enrichr Link: Over-expressed.
Enrichr Link: Under-expressed.
TC71_resRmd <- llply(names(TC71_gene_list), function(groupNow) {
genesNow <- TC71_gene_list[[groupNow]]
response <- httr::POST(
url = 'https://maayanlab.cloud/Enrichr/addList',
body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
)
)
response <- jsonlite::fromJSON(httr::content(response, as = "text"))
permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=",
response$shortId[1])
knitr::knit_child(text = c(
'### `r groupNow`',
'',
'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
''
),
envir = environment(),
quiet = TRUE)
})
cat(unlist(TC71_resRmd), sep = '\n')
Enrichr Link: Over-expressed.
Enrichr Link: Under-expressed.
S2KO_resRmd <- llply(names(S2KO_gene_list), function(groupNow) {
genesNow <- S2KO_gene_list[[groupNow]]
response <- httr::POST(
url = 'https://maayanlab.cloud/Enrichr/addList',
body = list(
'list' = paste0(genesNow, collapse = "\n"),
'description' = groupNow
)
)
response <- jsonlite::fromJSON(httr::content(response, as = "text"))
permalink <- paste0("https://maayanlab.cloud/Enrichr/enrich?dataset=",
response$shortId[1])
knitr::knit_child(text = c(
'### `r groupNow`',
'',
'Enrichr Link: <a href="`r permalink`" target="_blank">`r groupNow`</a>.',
''
),
envir = environment(),
quiet = TRUE)
})
cat(unlist(S2KO_resRmd), sep = '\n')
Enrichr Link: Over-expressed.
Enrichr Link: Under-expressed.